import requests        #导入requests包
from requests.packages.urllib3.exceptions import InsecureRequestWarning
import traceback
from bs4 import BeautifulSoup
import re
import threading
from concurrent.futures import ThreadPoolExecutor
from collections import deque
import queue
from urllib.request import urlopen
from urllib.parse import urlparse,urlunparse
import os
import json
from concurrent.futures import ThreadPoolExecutor,as_completed

from PaperParser import getAbstractMuti

from contextlib import closing

#禁用 verification 警告
#urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
#sun3dDict = "I:\\"
#savesaveDictDict = "E:\论文\目标检测\CVPR2020"
saveDict = "E:\论文\语义分割"
ICCV2021=["https://openaccess.thecvf.com/ICCV2021?day=all"]
CVPR2021=["https://openaccess.thecvf.com/CVPR2021?day=all"]
WACV2021=["https://openaccess.thecvf.com/WACV2021"]
CVPR2020=["https://openaccess.thecvf.com/CVPR2020?day=2020-06-16",
          "https://openaccess.thecvf.com/CVPR2020?day=2020-06-17",
          "https://openaccess.thecvf.com/CVPR2020?day=2020-06-18"]
WACV2020=["https://openaccess.thecvf.com/WACV2020"]
CVPR2019=["https://openaccess.thecvf.com/CVPR2019?day=2019-06-18",
          "https://openaccess.thecvf.com/CVPR2019?day=2019-06-19",
          "https://openaccess.thecvf.com/CVPR2019?day=2019-06-20"]
ICCV2019=["http://openaccess.thecvf.com/ICCV2019?day=2019-10-29",
          "https://openaccess.thecvf.com/ICCV2019?day=2019-10-30",
          "https://openaccess.thecvf.com/ICCV2019?day=2019-10-31",
          "https://openaccess.thecvf.com/ICCV2019?day=2019-11-01"]
allCVF = [WACV2021,CVPR2021,ICCV2021,CVPR2020,WACV2020,CVPR2019,ICCV2019]
wallpapershome=[
                # "https://wallpapershome.com/girls/celebrities/",
                # "https://wallpapershome.com/girls/models/",
                # "https://wallpapershome.com/girls/illustrations/",
                "https://wallpapershome.com/girls/?page=1",
                "https://wallpapershome.com/celebrities/?page=1"
                ]
hdwallsource = [
                "https://hdwallsource.com/category/celebrities/" ,
                #"https://hdwallsource.com/category/women/",
                ]
alphacoders  = [
                "https://wall.alphacoders.com/by_sub_category.php?id=169002&name=Model+Wallpapers",
                ]
thecvf = "http://openaccess.thecvf.com/"
sun3dhead = "http://sun3d.cs.princeton.edu/sfm/"
head = "https://wallpapershome.com"


#keyWord = "point_cloud"    #cvf链接中使用_填充单词空格
#keyWord = "object_detection"
#keyWord = ["Dehazing","Deraining","Deblurring"]
#keyWord = ["GAN","generative_adversarial_network"]
#keyWord = ["Semi-Supervised","unsupervised"]
#keyWord = ["celebrities","models","illustrations","actors","artists"]
#keyWord = ["Semantic_segmentation"]
keyWord = ["Semantic_segmentation","smoke_segmentation"]

threads = []
threadID = 1
queueLock = threading.Lock()
workQueue = queue.Queue(1000)

#head = thecvf

class myThread(threading.Thread):  # 继承父类threading.Thread
    def __init__(self,counter,object):
        threading.Thread.__init__(self)
        self.threadID = threadID
        self.counter = counter
        self.object = object

    def run(self):  # 把要执行的代码写到run函数里面 线程在创建后会直接运行run函数
        print("Starting " + self.name)
        savedPDF = self.object.saveFile(self.object.now_link,self.object.now_key)
        iter = next(savedPDF, None)
        if iter:
            getAbstractMuti(iter)
        print("Exiting " + self.name)

class paper_down():
    def __init__(self,urls=None,keyWord=None,head=""):
        self.keyWord = keyWord
        self.urls = urls
        self.links = {}
        #self.head =  urlunparse(urlparse(urls[0])[:2]) # (scheme, netloc, path, parameters, query, fragment)
        #self.head = "https://hdwallsource.com"
        self.head = head
        self.pool = ThreadPoolExecutor(max_workers=15)
        self.re = None
        self.bibtexSelectionXPath = "//*[@id='content']/dl/dd[3]/div/div"

    def threadDown(self):
        for key,value in self.links.items():
            for link in value:
                print(link)
                pdfUrl = self.head+link['href']
                self.now_key = key
                self.now_link = pdfUrl
                thread = myThread(workQueue,self)
                thread.start()
                threads.append(thread)

    def saveFile(self,url,keyWord):
        pathName = ""
        if keyWord:
            pathName = os.path.join(os.path.join(saveDict, keyWord))
        else:
            pathName = os.path.join(os.path.join(saveDict, "celebrities"))
        if not os.path.exists(pathName):
            try:
                os.makedirs(pathName)
            except (FileExistsError):
                pass

        file_name = os.path.join(pathName, url.split('/')[-1])
        if os.path.exists(file_name):
            print("{} is already exists".format(file_name))
            return

        u = urlopen(url)
        f = open(file_name, 'wb+')
        block_sz = 8192
        while True:
            buffer = u.read(block_sz)
            if not buffer:
                break
            f.write(buffer)
        f.close()
        print("Sucessful to download" + " " + file_name)
        yield file_name

    def getEndNoteImport(self):
        '''
        @InProceedings{Kim_2021_ICCV,
        author    = {Kim, Chris Dongjoo and Jeong, Jinseo and Moon, Sangwoo and Kim, Gunhee},
        title     = {Continual Learning on Noisy Data Streams via Self-Purified Replay},
        booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)},
        month     = {October},
        year      = {2021},
        pages     = {537-547}
        }
        '''
        pass


    def paper(self):
        '''
        获取pdf链接
        :return:
        '''
        for url in self.urls:
            response = requests.get(url, verify=False)
            content = response.content
            soup = BeautifulSoup(content, "html.parser", from_encoding="utf-8")
            # 获取所有的链接
            for k in keyWord:
                PDFlinks = soup.find_all('a', href=re.compile(k+".*.pdf", re.I))
                self.links[str(k)] = PDFlinks
                print(self.links)
                self.threadDown()

        for t in threads:
            t.join()

# 变量名转字符串
import inspect
def retrieve_name(var):
    callers_local_vars = inspect.currentframe().f_back.f_locals.items()
    return [var_name for var_name, var_val in callers_local_vars if var_val is var]
# 以字符串作为变量名
#globals()[i]

if __name__ == '__main__':
    namelist = allCVF
    for list in allCVF:
        print(retrieve_name(list)[0])
        os.path.join(saveDict,retrieve_name(list)[0])
        paper = paper_down(list,keyWord,thecvf)
        paper.paper()

# assert n != 0, 'n is zero!'   #如果断言失败，assert语句本身就会抛出AssertionError,启动Python解释器时可以用-O参数来关闭assert
# raise [exceptionType[,argument][,traceback]]   #raise 引发异常，有try-except捕获
# try:
# except ([FileExistsError,FileNotFoundError]) as fee:
# except (Exception ): # 常见错误基类
# else:
# finally:

#popover-download-button > div:nth-child(2) > div > ul:nth-child(4) > li > a
#popover-download-button > div:nth-child(2) > div > ul:nth-child(4) > li > a
#content > div.col-right > div.inner.inner-params > div.block-download > div > div > div.block-download__resolutions--6 > p:nth-child(1) > a
#content > div.col-right > div.inner.inner-params > div.block-download > div > div > div.block-download__resolutions--5 > p:nth-child(1) > a